** This Do-File replicates Figure A7 from P. Dutronc, A. Tondini "Large Means-Tested Pensions with Informal Labor Markets" ** 

set more off

cd "" /*Input here directory where the PALMS v3.2 (only years from 2002--2015) is stored */ 

use "south_africa_palms.dta", clear

**************************************
** Select Population for Estimation ** 
**************************************

drop if year==2008 | year==2009 /* Exclude years in which the threshold is lowered from 65 to 60 */ 

gen post=(year>2008)

keep if pop_group==1 | pop_group==2  /* Keep Black and Coloured individuals only, see text for explanation */ 

drop if sex==. | sex==9

gen married=(marital_status==1) /* 1/0 variable for marital status */ 

gen self=(status==2)

** Adjust weights for multiple year estimation ** 

bys year: egen mean_weight=mean(weight) 
replace weight=weight/mean_weight /* Adjust weights to have the same mean across waves */

** Definition of Informal Employment **

gen tot_inf=1 if status==1 & no_written_contract==1 /*Employees w/o written contract */
replace tot_inf=0 if status==1 & no_written_contract==0  /*Employees w. written contract */
replace tot_inf=1 if status==2 & not_registered==1 /*Self-employed w/o registered business*/
replace tot_inf=0 if status==2 & not_registered==0 /*Self-employed w. registered business*/
replace tot_inf=0 if status==3 | status==4 /*Not Employed*/ 

keep if tot_inf!=. /* Exclude from the estimation those with missing informality status.*/

gen f_emp=(tot_inf==0 & emp==1)

keep if tot_inf!=. /* Exclude if Informality Status is missing */

**  Wages and Hours ** 

*** Set Earnings in 2010 Rand ** 

gen cpi_index=69.3 if year==2002
replace cpi_index=73.3 if year==2003
replace cpi_index=72.8 if year==2004
replace cpi_index=74.3 if year==2005
replace cpi_index=76.7 if year==2006
replace cpi_index=81.4 if year==2007
replace cpi_index=89.6 if year==2008
replace cpi_index=96.1 if year==2009
replace cpi_index=100.0 if year==2010
replace cpi_index=105.0 if year==2011
replace cpi_index=111.1 if year==2012
replace cpi_index=117.5 if year==2013
replace cpi_index=124.7 if year==2014
replace cpi_index=130.3 if year==2015

replace monthly_sal=monthly_sal * (100/cpi_index)

keep if post==0 & sex==1 & emp==1 


*******************************************
** Estimation of Formal Job Wage Premium ** 
*******************************************

gen hourly_wage=monthly_sal/(hrs_wrk*4.5)
replace hourly_wage=ln(hourly_wage)

keep if firm_size_lfs!=. & jobstartyear!=. /* Exclude observations with missing values for relevant variables with significant share of missing values, to allow for comparability as controls are gradually included. */ 

keep if age<65 /* Keep only working-age population */ 



gen cat=runiform()

summarize cat, det 
gen control=1 if cat<r(p25)
replace control=2 if cat>=r(p25)
replace control=3 if cat>=r(p50)

gen coef=. 
gen ub=.
gen lb=.
gen r2=. 


local socio i.pop_group i.age i.Province i.education i.marital_status /* Set of socio-demographic controls */ 

local job i.firm_size_lfs i.occupation i.industry jobstartyear self /* Set of job controls */ 

** Estimated on whole Male Working-age population ** 

reg hourly_wage f_emp i.year   [w=income_weight]
replace coef=_b[f_emp] if control==1 
replace ub=_b[f_emp]+_se[f_emp]*1.96 if control==1 
replace lb=_b[f_emp]-_se[f_emp]*1.96 if control==1 
local r21 = round(`e(r2)' , .01)

reg hourly_wage f_emp i.year `socio'  [w=income_weight]
replace coef=_b[f_emp] if control==2
replace ub=_b[f_emp]+_se[f_emp]*1.96 if control==2
replace lb=_b[f_emp]-_se[f_emp]*1.96 if control==2
local r22 = round(`e(r2)' , .01)

reg hourly_wage f_emp i.year `socio' `job' [w=income_weight]
replace coef=_b[f_emp] if control==3
replace ub=_b[f_emp]+_se[f_emp]*1.96 if control==3
replace lb=_b[f_emp]-_se[f_emp]*1.96 if control==3
local r23 = round(`e(r2)' , .01)

** Estimated on whole 60--65 Working-age population ** 

reg hourly_wage f_emp i.year  if age>=60 & age<65 [w=income_weight]
replace coef=_b[f_emp] if control==1 & age>=60 & age<65
replace ub=_b[f_emp]+_se[f_emp]*1.96 if control==1 & age>=60 & age<65
replace lb=_b[f_emp]-_se[f_emp]*1.96 if control==1 & age>=60 & age<65

reg hourly_wage f_emp i.year `socio' if age>=60 & age<65 [w=income_weight]
replace coef=_b[f_emp] if control==2 & age>=60 & age<65
replace ub=_b[f_emp]+_se[f_emp]*1.96 if control==2 & age>=60 & age<65
replace lb=_b[f_emp]-_se[f_emp]*1.96 if control==2 & age>=60 & age<65

reg hourly_wage f_emp i.year `socio' `job'  if age>=60 & age<65 [w=income_weight]
replace coef=_b[f_emp] if control==3 & age>=60 & age<65
replace ub=_b[f_emp]+_se[f_emp]*1.96 if control==3 & age>=60 & age<65
replace lb=_b[f_emp]-_se[f_emp]*1.96 if control==3 & age>=60 & age<65

label define control 1 "Unconditional" 2 "+ SD Controls" 3 "SD + Job Controls"
label values control control 

********************
*** Figure A 7 *****
********************

twoway (scatter coef control if age<60, mcolor(black) msymbol(circle_hollow))  /// 
|| (scatter coef control if age>=60 & age<65, mcolor(black) msymbol(triangle) ) ///
|| (rcap ub lb control if age>=60 & age<65, lcolor(black)) /// 
||, xlabel(1 (1) 3, valuelabel angle(65)) graphregion( color(white) ) plotregion(fcolor(white) ) xtitle("") ylabel(0 (0.2) 1) legend(order(1 2) label(1 "All Males") label(2 "60--65")) ///
||, text( 0.05 1.07 "R^2=`r21'" , size(small )  tstyle(subheading )  just(left)   ) ///
||, text( 0.05 2 "R^2=`r22'" , size(small )  tstyle(subheading )  just(left)   ) ///
||, text( 0.05 3 "R^2=`r23'" , size(small )  tstyle(subheading )  just(left)   ) 


